{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Association Rule - Apriori and ECLAT \n",
    "\n",
    "Training association rule models (Apriori and ECLAT) to find the most related items bought by customers of a french supermarket during a week. All 7501 lines of the dataset represent items bought by an unique customer, during this week.\n",
    "\n",
    "This algorithm associate products preferences by most of the customers and can be used to generate products recommendation and help on displaying products strategy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Importing the libraries\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Data Loading\n",
    "dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)\n",
    "\n",
    "# Adding all customers into a list of lists\n",
    "transactions = []\n",
    "for i in range(0, 7501):\n",
    "    transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "      <th>15</th>\n",
       "      <th>16</th>\n",
       "      <th>17</th>\n",
       "      <th>18</th>\n",
       "      <th>19</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>shrimp</td>\n",
       "      <td>almonds</td>\n",
       "      <td>avocado</td>\n",
       "      <td>vegetables mix</td>\n",
       "      <td>green grapes</td>\n",
       "      <td>whole weat flour</td>\n",
       "      <td>yams</td>\n",
       "      <td>cottage cheese</td>\n",
       "      <td>energy drink</td>\n",
       "      <td>tomato juice</td>\n",
       "      <td>low fat yogurt</td>\n",
       "      <td>green tea</td>\n",
       "      <td>honey</td>\n",
       "      <td>salad</td>\n",
       "      <td>mineral water</td>\n",
       "      <td>salmon</td>\n",
       "      <td>antioxydant juice</td>\n",
       "      <td>frozen smoothie</td>\n",
       "      <td>spinach</td>\n",
       "      <td>olive oil</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>burgers</td>\n",
       "      <td>meatballs</td>\n",
       "      <td>eggs</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>chutney</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>turkey</td>\n",
       "      <td>avocado</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>mineral water</td>\n",
       "      <td>milk</td>\n",
       "      <td>energy bar</td>\n",
       "      <td>whole wheat rice</td>\n",
       "      <td>green tea</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              0          1           2                 3             4   \\\n",
       "0         shrimp    almonds     avocado    vegetables mix  green grapes   \n",
       "1        burgers  meatballs        eggs               NaN           NaN   \n",
       "2        chutney        NaN         NaN               NaN           NaN   \n",
       "3         turkey    avocado         NaN               NaN           NaN   \n",
       "4  mineral water       milk  energy bar  whole wheat rice     green tea   \n",
       "\n",
       "                 5     6               7             8             9   \\\n",
       "0  whole weat flour  yams  cottage cheese  energy drink  tomato juice   \n",
       "1               NaN   NaN             NaN           NaN           NaN   \n",
       "2               NaN   NaN             NaN           NaN           NaN   \n",
       "3               NaN   NaN             NaN           NaN           NaN   \n",
       "4               NaN   NaN             NaN           NaN           NaN   \n",
       "\n",
       "               10         11     12     13             14      15  \\\n",
       "0  low fat yogurt  green tea  honey  salad  mineral water  salmon   \n",
       "1             NaN        NaN    NaN    NaN            NaN     NaN   \n",
       "2             NaN        NaN    NaN    NaN            NaN     NaN   \n",
       "3             NaN        NaN    NaN    NaN            NaN     NaN   \n",
       "4             NaN        NaN    NaN    NaN            NaN     NaN   \n",
       "\n",
       "                  16               17       18         19  \n",
       "0  antioxydant juice  frozen smoothie  spinach  olive oil  \n",
       "1                NaN              NaN      NaN        NaN  \n",
       "2                NaN              NaN      NaN        NaN  \n",
       "3                NaN              NaN      NaN        NaN  \n",
       "4                NaN              NaN      NaN        NaN  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Apriori implementation using apyori library \n",
    "source: https://github.com/ymoch/apyori\n",
    "\n",
    "The output of this part is to see which are the products that used to be more bought in combination compared to other combinations using apriori algorithm.\n",
    "\n",
    "This code is a based on a lecture from the course: Machine Learning A-Z™ by Kirill Eremenko https://www.udemy.com/machinelearning/learn/v4/overview. I put some transformations to fit on dataframes and to make the visualization easier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['shrimp',\n",
       "  'almonds',\n",
       "  'avocado',\n",
       "  'vegetables mix',\n",
       "  'green grapes',\n",
       "  'whole weat flour',\n",
       "  'yams',\n",
       "  'cottage cheese',\n",
       "  'energy drink',\n",
       "  'tomato juice',\n",
       "  'low fat yogurt',\n",
       "  'green tea',\n",
       "  'honey',\n",
       "  'salad',\n",
       "  'mineral water',\n",
       "  'salmon',\n",
       "  'antioxydant juice',\n",
       "  'frozen smoothie',\n",
       "  'spinach',\n",
       "  'olive oil'],\n",
       " ['burgers',\n",
       "  'meatballs',\n",
       "  'eggs',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan',\n",
       "  'nan']]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Inspecting elements\n",
    "transactions[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Training Apriori on the dataset\n",
    "# The hyperparameters choosen on this training are:\n",
    "# min_support = items bought more than 3 times a day * 7 days (week) / 7500 customers = 0.0028\n",
    "# min_confidence: at least 20%, min_lift = minimum of 3 (less than that is too low)\n",
    "\n",
    "from apyori import apriori\n",
    "rules = apriori(transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Visualising the results\n",
    "results = list(rules)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "lift = []\n",
    "association = []\n",
    "for i in range (0, len(results)):\n",
    "    lift.append(results[:len(results)][i][2][0][3])\n",
    "    association.append(list(results[:len(results)][i][0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Visualizing results in a dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rank = pd.DataFrame([association, lift]).T\n",
    "rank.columns = ['Association', 'Lift']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Association</th>\n",
       "      <th>Lift</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>[olive oil, whole wheat pasta, mineral water, ...</td>\n",
       "      <td>6.11586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>[olive oil, whole wheat pasta, mineral water]</td>\n",
       "      <td>6.11586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>[soup, mineral water, frozen vegetables, milk]</td>\n",
       "      <td>5.48441</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>[soup, mineral water, frozen vegetables, nan, ...</td>\n",
       "      <td>5.48441</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>[honey, fromage blanc, nan]</td>\n",
       "      <td>5.16427</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[honey, fromage blanc]</td>\n",
       "      <td>5.16427</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>[chicken, nan, light cream]</td>\n",
       "      <td>4.84395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[chicken, light cream]</td>\n",
       "      <td>4.84395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[pasta, escalope]</td>\n",
       "      <td>4.70081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>[pasta, escalope, nan]</td>\n",
       "      <td>4.70081</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           Association     Lift\n",
       "128  [olive oil, whole wheat pasta, mineral water, ...  6.11586\n",
       "58       [olive oil, whole wheat pasta, mineral water]  6.11586\n",
       "96      [soup, mineral water, frozen vegetables, milk]  5.48441\n",
       "146  [soup, mineral water, frozen vegetables, nan, ...  5.48441\n",
       "28                         [honey, fromage blanc, nan]  5.16427\n",
       "3                               [honey, fromage blanc]  5.16427\n",
       "16                         [chicken, nan, light cream]  4.84395\n",
       "0                               [chicken, light cream]  4.84395\n",
       "2                                    [pasta, escalope]  4.70081\n",
       "26                              [pasta, escalope, nan]  4.70081"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Show top 10 higher lift scores\n",
    "rank.sort_values('Lift', ascending=False).head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By the study, \"olive oil, whole wheat pasta, mineral water\" are the most commom combined items from this week for the supermarket in question.  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ECLAT Implementation\n",
    "\n",
    "This is an implementation of the ECLAT code by hand. It calculate the pairs that have been bought more frequently comparing to other pairs. At the end, we expect to see what is the most common combination of products during the week. \n",
    "\n",
    "An extension of the code can calculate the three most common combination, 4, and so on."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Getting the list of products bought this week by all customers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Putting all transactions in a single list\n",
    "itens = []\n",
    "for i in range(0, len(transactions)):\n",
    "    itens.extend(transactions[i])\n",
    "\n",
    "# Finding unique items from transactions and removing nan\n",
    "uniqueItems = list(set(itens))\n",
    "uniqueItems.remove('nan')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# test code\n",
    "#tra = [s for s in transactions if (\"mineral water\") in s and (\"ground beef\") in s and (\"shrimp\") in s]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating combinations with the items - pairs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pair = []\n",
    "for j in range(0, len(uniqueItems)):\n",
    "    k = 1;\n",
    "    while k <= len(uniqueItems):\n",
    "        try:\n",
    "            pair.append([uniqueItems[j], uniqueItems[j+k]])\n",
    "        except IndexError:\n",
    "            pass\n",
    "        k = k + 1;       "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Calculating score\n",
    "The calculation is done looking at the number of customers that bought both items (the pair) and divided by all customers of the week (7501). This calculation is done for all pairs possible and the score is returned on \"score\" list.\n",
    "\n",
    "<center> . </center>\n",
    "<center> *** score = (# lists that contain [item x and item y]) / (# all lists) ***</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "score = []\n",
    "for i in pair:\n",
    "    cond = []\n",
    "    for item in i:\n",
    "        cond.append('(\"%s\") in s' %item)\n",
    "    mycode = ('[s for s in transactions if ' + ' and '.join(cond) + ']')\n",
    "    #mycode = \"print 'hello world'\"\n",
    "    score.append(len(eval(mycode))/7501.)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Showing results\n",
    "\n",
    "Top 10 Most common pairs of items of this week"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ranking_ECLAT = pd.DataFrame([pair, score]).T\n",
    "ranking_ECLAT.columns = ['Pair', 'Score']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pair</th>\n",
       "      <th>Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3809</th>\n",
       "      <td>[spaghetti, mineral water]</td>\n",
       "      <td>0.0597254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6389</th>\n",
       "      <td>[chocolate, mineral water]</td>\n",
       "      <td>0.0526596</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7096</th>\n",
       "      <td>[mineral water, eggs]</td>\n",
       "      <td>0.0509265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689</th>\n",
       "      <td>[milk, mineral water]</td>\n",
       "      <td>0.0479936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6002</th>\n",
       "      <td>[ground beef, mineral water]</td>\n",
       "      <td>0.0409279</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3779</th>\n",
       "      <td>[spaghetti, chocolate]</td>\n",
       "      <td>0.0391948</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3770</th>\n",
       "      <td>[spaghetti, ground beef]</td>\n",
       "      <td>0.0391948</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3811</th>\n",
       "      <td>[spaghetti, eggs]</td>\n",
       "      <td>0.0365285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6604</th>\n",
       "      <td>[french fries, eggs]</td>\n",
       "      <td>0.0363951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1877</th>\n",
       "      <td>[frozen vegetables, mineral water]</td>\n",
       "      <td>0.0357286</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    Pair      Score\n",
       "3809          [spaghetti, mineral water]  0.0597254\n",
       "6389          [chocolate, mineral water]  0.0526596\n",
       "7096               [mineral water, eggs]  0.0509265\n",
       "689                [milk, mineral water]  0.0479936\n",
       "6002        [ground beef, mineral water]  0.0409279\n",
       "3779              [spaghetti, chocolate]  0.0391948\n",
       "3770            [spaghetti, ground beef]  0.0391948\n",
       "3811                   [spaghetti, eggs]  0.0365285\n",
       "6604                [french fries, eggs]  0.0363951\n",
       "1877  [frozen vegetables, mineral water]  0.0357286"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ranking_ECLAT.sort_values('Score', ascending=False).head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What if we do that for trios?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Creating trios\n",
    "trio = []\n",
    "for j in range(0, len(uniqueItems)):\n",
    "    for k in range(j, len(uniqueItems)):\n",
    "        for l in range(k, len(uniqueItems)):\n",
    "            if (k != j) and (j != l) and (k != l):\n",
    "                try:\n",
    "                    trio.append([uniqueItems[j], uniqueItems[j+k], uniqueItems[j+l]])\n",
    "                except IndexError:\n",
    "                    pass "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['pet food', 'green tea', 'whole wheat rice'],\n",
       " ['pet food', 'green tea', 'antioxydant juice'],\n",
       " ['pet food', 'green tea', 'chicken'],\n",
       " ['pet food', 'green tea', 'milk'],\n",
       " ['pet food', 'green tea', 'mint green tea']]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trio[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "score_trio = []\n",
    "for i in trio:\n",
    "    cond = []\n",
    "    for item in i:\n",
    "        cond.append('(\"%s\") in s' %item)\n",
    "    mycode = ('[s for s in transactions if ' + ' and '.join(cond) + ']')\n",
    "    #mycode = \"print 'hello world'\"\n",
    "    score_trio.append(len(eval(mycode))/7501.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Trio</th>\n",
       "      <th>Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>134586</th>\n",
       "      <td>[spaghetti, chocolate, mineral water]</td>\n",
       "      <td>0.0158646</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35350</th>\n",
       "      <td>[milk, spaghetti, mineral water]</td>\n",
       "      <td>0.0157312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135293</th>\n",
       "      <td>[spaghetti, mineral water, eggs]</td>\n",
       "      <td>0.0142648</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37930</th>\n",
       "      <td>[milk, chocolate, mineral water]</td>\n",
       "      <td>0.0139981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38637</th>\n",
       "      <td>[milk, mineral water, eggs]</td>\n",
       "      <td>0.0130649</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86786</th>\n",
       "      <td>[frozen vegetables, spaghetti, mineral water]</td>\n",
       "      <td>0.0119984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37543</th>\n",
       "      <td>[milk, ground beef, mineral water]</td>\n",
       "      <td>0.0110652</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33418</th>\n",
       "      <td>[milk, frozen vegetables, mineral water]</td>\n",
       "      <td>0.0110652</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35320</th>\n",
       "      <td>[milk, spaghetti, chocolate]</td>\n",
       "      <td>0.0109319</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134588</th>\n",
       "      <td>[spaghetti, chocolate, eggs]</td>\n",
       "      <td>0.0105319</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 Trio      Score\n",
       "134586          [spaghetti, chocolate, mineral water]  0.0158646\n",
       "35350                [milk, spaghetti, mineral water]  0.0157312\n",
       "135293               [spaghetti, mineral water, eggs]  0.0142648\n",
       "37930                [milk, chocolate, mineral water]  0.0139981\n",
       "38637                     [milk, mineral water, eggs]  0.0130649\n",
       "86786   [frozen vegetables, spaghetti, mineral water]  0.0119984\n",
       "37543              [milk, ground beef, mineral water]  0.0110652\n",
       "33418        [milk, frozen vegetables, mineral water]  0.0110652\n",
       "35320                    [milk, spaghetti, chocolate]  0.0109319\n",
       "134588                   [spaghetti, chocolate, eggs]  0.0105319"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ranking_ECLAT_trio = pd.DataFrame([trio, score_trio]).T\n",
    "ranking_ECLAT_trio.columns = ['Trio', 'Score']\n",
    "ranking_ECLAT_trio.sort_values('Score', ascending=False).head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## What about comparing the results from Apriori and ECLAT?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We got from Apriori that the combination that lead to more \"attractiveness power\" is \"olive oil\", \"whole wheat pasta\" and \"mineral water\". If we run the ECLAT code for this set of items, we will obtain: 0.0039.\n",
    "\n",
    "This score of 3 items has not enough score to be placed among top 10, but they are measuring different metrics.  According to apriori these are the items that when picked one lead to another items more frequently than other combinations, i.e. when a person pick 'olive oil', the probability of picking 'whole wheat pasta' and 'mineral water' is much higher than picking another combination. ECLAT in another hand is just sorting as the most common combinations of all lists, not caring about how one item isolatedly can influence in the purchase of another."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "i = [\"olive oil\", \"whole wheat pasta\", \"mineral water\"]\n",
    "cond = []\n",
    "for item in i:\n",
    "    cond.append('(\"%s\") in s' %item)\n",
    "mycode = ('[s for s in transactions if ' + ' and '.join(cond) + ']')\n",
    "#mycode = \"print 'hello world'\"\n",
    "tra = eval(mycode)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Score for \"olive oil\", \"whole wheat pasta\", \"mineral water\": 0.00386615117984\n"
     ]
    }
   ],
   "source": [
    "print 'Score for \"olive oil\", \"whole wheat pasta\", \"mineral water\":', len(tra)/7501."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
